2.2 Scatterplots, Histograms, Boxplots, Barcharts

Goal: Assess if there is evidence that the relationship between city and highway mpg

is different based on the drive of the car (4 wheel, front or rear.)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Scatterplots
mpg %>% ggplot(aes(x=hwy,y=cty))+geom_point()

# Histograms
mpg %>% ggplot(aes(x=cty))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Boxplot
mpg %>% ggplot(aes(x=class,y=cty))+geom_boxplot()

# Barchart
mpg %>% ggplot(aes(x=class))+geom_bar()

# 2.4 Mapping | Aesthetics (color=) # Scatterplots (color=)

mpg %>% ggplot(aes(x=hwy,y=cty,color=mpg$drv)) + geom_point()

mpg %>% ggplot(aes(x=hwy,y=cty)) + geom_point(color="blue")

# Histograms (color=)

ggplot(data = mpg[mpg$class=="compact",],aes(x=cty,fill=drv)) + geom_histogram(color="black")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data = mpg[mpg$class=="compact",],aes(x=cty)) + geom_histogram(fill="blue",color="black")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Boxplot (color=)

mpg %>% ggplot(aes(x=class,y=cty,fill=drv)) + geom_boxplot(color="blue")

mpg %>% ggplot(aes(x=class,y=cty)) + geom_boxplot(fill="black",color="blue")

2.4 Mapping | Aesthetics (size=, alpha=, shape=)

mpg %>% ggplot(aes(x=hwy,y=cty,size=mpg$displ))+geom_point()

mpg %>% ggplot(aes(x=hwy,y=cty,alpha=mpg$displ))+geom_point()

mpg %>% ggplot(aes(x=hwy,y=cty,shape=mpg$drv))+geom_point()

2.5 Geometric Objects (geoms)

Create a plot with just the smoothed line.

mpg %>% ggplot(aes(x=hwy,y=cty))+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Create a plot that has a different line for each level of drive (drv).

mpg %>% ggplot(aes(x=hwy,y=cty,linetype=drv))+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Create a plot that has the smoothed line and the points and designate

the type of drive by color… in both the line and the points.

#Global aes() ... one aes that applies to all layers
mpg %>% ggplot(aes(x=hwy,y=cty,linetype=drv,color=drv))+geom_smooth()+geom_point()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Create a plot that has the points and the smoothed line but the color

designates the class of the car and the lines are diffentiated by the drive variable.

#Individual aes() ... an aesthetic for each layer
ggplot(data = mpg) +
  geom_point(aes(x=hwy,y=cty,color=class))+
  geom_smooth(aes(x=hwy,y=cty,linetype=drv,color=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# 2.6 Label

mpg %>% ggplot(aes(x=class,y=cty)) +
  geom_boxplot() +
  ggtitle("Boxplot of ...") +
  xlab("Class") +
  ylab("City MPG")

2.7 Facet

#Individual aes() ... an aesthetic for each layer
ggplot(data = mpg) +
  geom_point(aes(x=hwy,y=cty,color=class))+
  geom_smooth(aes(x=hwy,y=cty,linetype=class,color=class))+
  facet_wrap(~class)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 22.985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.0302
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 22.985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1.0302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 24.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.035
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.8765e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 24.035
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.035
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 7.8765e-017
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 1

mpg %>% ggplot(aes(x=hwy))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 2.8 Bar Chart # Transformations

mpg %>% ggplot(aes(x=class,y=..prop..,group=1))+geom_bar(stat="count")

# stat="count" is default
mpg %>% ggplot(aes(x=class))+geom_bar(stat="count")

mpg %>% ggplot(aes(x=class))+geom_bar()

# Error will pop up for below command
# mpg %>% ggplot(aes(x=class,y=cty))+geom_bar()
# "Error: stat_count() must not be used with a y aesthetic"

# Fix: Add stat="identity" to add the values of the y value per class (x level)
mpg %>% ggplot(aes(x=class,y=cty))+geom_bar(stat = "identity")

mpg %>% ggplot(aes(x=class,y=cty))+geom_bar(stat = "identity") + facet_wrap(~class)

# Position={Stack, Fill, Dodge}

# position="stack" is default
mpg %>% ggplot(aes(x=class))+geom_bar()

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar()

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar(position="stack")

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar(position="dodge")

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar(position="fill")

# Scatterplot # Position = jitter

mpg %>% ggplot(aes(x=hwy,y=cty))+geom_point()

mpg %>% ggplot(aes(x=hwy,y=cty))+geom_point(position = "jitter")

# 2.9 Other Coordinate System # coord_flip(), coord_polar()

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar()

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar()+coord_flip()

mpg %>% ggplot(aes(x=class,fill=drv))+geom_bar()+coord_polar()

US Maps

#install.package("maps)
library(maps)

#US map
usa = map_data("usa")
p <- ggplot()+geom_polygon(data=usa,aes(x=long,y=lat,group=group),fill="blue",color="black")+coord_quickmap()
#Dallas Coords
Dallas <- tibble(long=c(-96.7970),lat=c(32.7767),names=c("Dallas"))
#US map with Dallas city mark added
p + geom_point(data = Dallas,aes(x=long,y=lat),shape=21,fill="yellow",color="black",size=5) +
  geom_text(data=Dallas,aes(x=long,y=lat,label=names),hjust=0,nudge_x = 1,color="white")

World map

world=map_data("world")
ggplot(world,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

# World2 map

world2=map_data("world2")
ggplot(world2,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

Italy Map

italy=map_data("italy")
ggplot(italy,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

# France Map

france=map_data("france")
ggplot(france,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

# US state map

state=map_data("state")
ggplot(state,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

# US county map

county=map_data("county")
ggplot(county,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

New Zealand map

nz=map_data("nz")
ggplot(nz,aes(long,lat,group=group)) +
  geom_polygon(fill="white",color="black") +
  coord_quickmap()

2.10 Themes

Excel

# install.packages("ggthemes")
library(ggthemes)
mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_excel()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_excel()

# The Economist

mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_economist()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_economist()

# classic

mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_classic()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_classic()

# wsj - Wall Street Journal

mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_wsj()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_wsj()

# pander

mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_pander()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_pander()

# fivethirtyeight

mpg %>% ggplot(aes(x=hwy,y=cty)) + 
  geom_point(position = "jitter") + 
  ggtitle("City MPG v. Highway MPG") + 
  theme_fivethirtyeight()

mpg %>% ggplot(aes(x=class,fill=class)) + geom_bar()+theme_fivethirtyeight()

# Additional Theme Resources!
# http://www.sthda.com/english/wiki/ggplot2-thethemes-and-background-colors-the-3-elements
# https://jrnold.github.io/ggthemes/
# https://www.r-bloggers.com/custom-themes-in-ggplot2/
# https://cran.r-project.org/web/packages/ggthemes/ggthemes/pdf

2.11 Interactive and 3-D Plots | Plotly

Interactive Plots | plotly

# install.packages("plotly")
library(ggthemes)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p = mpg %>%
  ggplot(aes(x=hwy,y=cty)) +
  geom_point(position="jitter") + ggtitle("City MPG v. Highway MPG") + theme_excel()

ggplotly()
p = mpg[mpg$class=="compact",] %>%
  ggplot(aes(x=cty,fill=drv)) +
  geom_histogram(color="blue")

ggplotly()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Student Data

#generate the data
age <- c(22,21,24,19,20,23)
yrs_math_ed <- c(4,5,2,5,3,5)
names <- c("Mary","Martha","Kim","Kristen","Amy","Sam")
subject <- c("English","Math","Socialogy","Math","Music","Dance")
df3 <- data.frame(Age=age, Years=yrs_math_ed, Name=names, Subject=subject)

#plotting code
p <- df3 %>%
  ggplot(aes(x=Name,y=Years)) +
  geom_bar(stat = "identity") +
  ggtitle("Distribution of Class")

#enabling interactive piece
ggplotly(p)

3D Plots | plotly

#mpg
p <- plot_ly(mpg,x=~cty,y=~hwy,z=~displ,color=~drv) %>%
  add_markers() %>%
  layout(scene=list(xaxis=list(title='City MPG'),
                    yaxis=list(title='Displacement'),
                    zaxis=list(title='Highway MPG')))
p
#Iris
p1 <- plot_ly(iris,x=~Sepal.Width,y=~Sepal.Length,z=~Petal.Width,color = ~Species) %>%
  add_markers() %>%
  layout(scene=list(xaxis=list(title='Sepal Width'),
                    yaxis=list(title='Sepal Length'),
                    zaxis=list(title='Petal Width')))
p1
p = mpg %>%
ggplot(mapping = aes(x = hwy, y = cty)) + 
geom_point(position = "jitter") + ggtitle("City MPG v. Highway MPG") + theme_excel()
ggplotly(p)